# delimit ;  
capture log close;
set more 1 ; 
* this is a heavily modified version of the march 18 2002 version, written in april/may 2008;
* to avoid confusion between comments added in april 2008 and comments that existed previously;
* all april 2008 comments are prefaced by "408C:";
*408C:note that all trk* files were replaced with trk2006.dta;

* trk2014 now used, waves 7-12 added July 2014;
* trk2020 now used, waves 13-14 added Sep 2022;
* set this to 1 if include 2020 wave;
global INCLUDE2020 1;

*original is two lines below
*global folder "C:\research";
*global folder "C:\Dropbox\";
global folder "D:\Dropbox";
*global folder "\\econ-san-fs\Home4\uctpkp0\SystemFolder\Windows7\Desktop";
*global folder "\\ad.ucl.ac.uk\homer\zctpmer\Downloads";
*global folder "C:\Users\Jeremy\Dropbox";
*global folder "~/";
*\\ad.ucl.ac.uk\homer\zctpmer\Downloads



*original
         local saved  "$folder\hrs\merge\";
         local trackd "$folder\hrs\tracker\";
         local wave1d "$folder\hrs\wave1\";
         local wave2d "$folder\hrs\wave2\";
         local wave3d "$folder\hrs\wave3\";
         local wave4d "$folder\hrs\wave4\";
         local wave5d "$folder\hrs\wave5\";
		 local wave6d "$folder\hrs\wave6\";
         local wave7d "$folder\hrs\wave7\";
         local wave8d "$folder\hrs\wave8\";
	 local wave9d "$folder\hrs\wave9\";
	 local wave10d "$folder\hrs\wave10\";
	 local wave11d "$folder\hrs\wave11\";
	 local wave12d "$folder\hrs\wave12\";
	 local wave13d "$folder\hrs\wave13\";
	 local wave14d "$folder\hrs\wave14\";
	 local wave15d "$folder\hrs\wave15\";
         local aheadw1 "$folder\hrs\aheadw1\";
         local aheadw2 "$folder\hrs\aheadw2\";

/*
         local saved  "\\ad.ucl.ac.uk\homer\zctpmer\Downloads\hrs\merge\";
         local trackd "\\ad.ucl.ac.uk\homer\zctpmer\Downloads\tracker\";
         local wave1d "\\ad.ucl.ac.uk\homer\zctpmer\Downloads\wave1\";
         local wave2d "\\ad.ucl.ac.uk\homer\zctpmer\Downloads\wave2\";
         local wave3d "\\ad.ucl.ac.uk\homer\zctpmer\Downloads\wave3\";
         local wave4d "\\ad.ucl.ac.uk\homer\zctpmer\Downloads\wave4\";
         local wave5d "\\ad.ucl.ac.uk\homer\zctpmer\Downloads\wave5\";
	 local wave6d "\\ad.ucl.ac.uk\homer\zctpmer\Downloads\wave6\";
         local wave7d "\\ad.ucl.ac.uk\homer\zctpmer\Downloads\wave7\";
         local wave8d "\\ad.ucl.ac.uk\homer\zctpmer\Downloads\wave8\";
         local wave9d "\\ad.ucl.ac.uk\homer\zctpmer\Downloads\wave9\";
         local wave10d "\\ad.ucl.ac.uk\homer\zctpmer\Downloads\wave10\";
	 local wave11d "\\ad.ucl.ac.uk\homer\zctpmer\Downloads\wave11\";
	 local wave12d "\\ad.ucl.ac.uk\homer\zctpmer\Downloads\wave12\";
         local aheadw1 "\\ad.ucl.ac.uk\homer\zctpmer\Downloads\aheadw1\";
         local aheadw2 "\\ad.ucl.ac.uk\homer\zctpmer\Downloads\aheadw2\";

		 */
		 
*log using `saved'demog.log, replace ; clear;
program drop _all;
set mem 300m;

program define _getid_;
     version 6.0;
     gen temp=real(HHID);
     drop HHID;
     gen HHID=temp;
     drop temp;
     gen temp=real(PN);
     drop PN;
     gen PN=temp;
     drop temp;
     gen long HHIDPN=HHID*1000+PN;
     summ HHID PN HHIDPN;
     end;

* code up basic demographic variables;
* these are mostly 0-1 variables, =1 if yes;
* use `trackd'TRK2016TR_R.dta;
* use the latest which is 2020. there are no tracker files from https://hrsdata.isr.umich.edu/data-products/cross-wave-tracker-file for 2018 (as of Sep 2022);
use `trackd'TRK2020TR_R.dta;

_getid_;
*408C: rather than renaming some of the trk2006 variables, I generate new variables with the same values, so either one can be called;
gen deathm=EXDEATHMO;
gen deathyr=EXDEATHYR;
replace deathm=.  if (deathm<1|deathm>12);
gen birthm=BIRTHMO;
gen birthyr=BIRTHYR;
replace birthm =. if (birthm<1|birthm>12);
replace birthyr =. if birthyr<1850|birthyr>1999; 
gen black=0;
replace black=1 if RACE==2;
replace black=. if RACE==0; 
gen hrscoh=STUDY;
gen temp=real(OVHHID);
drop OVHHID;
gen OVHHID=temp;
drop temp;
gen temp=real(OVPN);
drop OVPN;
gen OVPN=temp;
drop temp;

* Chris: where obvious, variables starting Q and R added for 2018 and 2020; 
sort HHID PN;
keep HHID PN HHIDPN OVHHID OVPN birthm birthyr black hrscoh  deathm deathyr
AALIVE BALIVE CALIVE DALIVE EALIVE FALIVE GALIVE HALIVE JALIVE KALIVE LALIVE MALIVE NALIVE OALIVE PALIVE QALIVE RALIVE
ASUBHH BSUBHH CSUBHH DSUBHH ESUBHH FSUBHH GSUBHH HSUBHH JSUBHH KSUBHH LSUBHH MSUBHH NSUBHH OSUBHH PSUBHH QSUBHH RSUBHH
AAGE BAGE CAGE DAGE EAGE FAGE GAGE HAGE HAGE HAGE HAGE  NAGE OAGE PAGE QAGE RAGE
AIWMONTH BIWMONTH CIWMONTH DIWMONTH EIWMONTH FIWMONTH GIWMONTH HIWMONTH JIWMONTH KIWMONTH LIWMONTH MIWMONTH NIWMONTH OIWMONTH PIWMONTH QIWMONTH RIWMONTH
AIWYEAR BIWYEAR CIWYEAR DIWYEAR EIWYEAR FIWYEAR GIWYEAR HIWYEAR JIWYEAR KIWYEAR LIWYEAR MIWYEAR NIWYEAR OIWYEAR PIWYEAR QIWYEAR RIWYEAR
EXDEATHMO EXDEATHYR BIRTHMO BIRTHYR GENDER RACE SCHLYRS HISPANIC 
JMARST KMARST LMARST MMARST NMARST OMARST PMARST QMARST RMARST;

save `saved'demog, replace;

summ;

drop _all;


program define _dowave_;
     version 6.0;
     gen intm$wave=$intm;
     gen intyr$wave=$intyr;
     replace intm$wave=. if ($intm<1|$intm>12);
     replace intyr$wave=. if $intyr<$minintyr|$intyr>$maxintyr; * Keep bounds consistent with latest wave;
	 gen deathm$wave=$deathm;
	 gen deathyr$wave=$deathyr;
     gen birthm$wave=$birthm;
     gen birthyr$wave=$birthyr;
     replace birthm$wave=. if ($birthm<1|$birthm>12);
     replace birthyr$wave=. if $birthyr<1850|$birthyr>$maxintyr;
     gen age$wave=intyr$wave-$birthyr;
     replace age$wave=age$wave-1 if (intm$wave-birthm$wave<0);
     gen school$wave=$school;
     replace school$wave=. if $school>20|$school<0; *408C: removes NA values (99) or junk values;
     gen black$wave=0;
     replace black$wave=1 if $black==2;
     replace black$wave=. if $black==.|$black==0; *408C: changed this coding to match the coding for the trk2006 variable RACE which will be used for all waves;
     replace black$wave=0 if $hispanic==0|$hispanic==1|$hispanic==2; *408C: added this command to bring adjustment according to information on Hispanics from the tracker file;
     gen male$wave=0;
     replace male$wave=1 if $male==1;
     replace male$wave=. if $male==.;
     gen birth_in_months$wave=(($birthyr*12)+$birthm);
     gen int_in_months$wave=((intyr$wave*12)+intm$wave);
     gen realage$wave=((int_in_months$wave-birth_in_months$wave)/12);
     replace realage$wave=. if ($birthyr==.|$birthm==.|intyr$wave==.|intm$wave==.);
     end;

*408C: In the old version of this program, trk2006.dta was read in later in the file and its variables were only used for later waves;
*408C: In the new version, we decided to use the trk2006 variables for age, interview month, interview year, birthmonth, birthyear, gender, schooling, and race;
*408C: So the only important variable that we drew from the separate wave files was marriage, and that's because trk2006 doesn't have marriage data before wave7;
*408C: For all of the waves;
*408c: *AGE=AGE, *IWMONTH=INTERVIEW MONTH, *IWYEAR=INTERVIEW YEAR, BIRTHMO=BIRTHMONTH, BIRTHYR=BIRTHYEAR, GENDER=GENDER, RACE=RACE, SCHLYRS=years of school completed;
*408C: Changed all of these variables in the subsequent code to their new names from trk2006;
*408C: For the sake of keeping things less cluttered I did not document these changes in the file as I went;
*408c: prefixes "A, B, C, D, E, F, G, H" refer to years "1992, 1993, 1994, 1995, 1996, 1998, 2000, 2002" respectively; 



****************** HRS WAVE I ******************;
use `wave1d'health;
_getid_;

*408C: moved merge to the beginning of the section so that trk2006 variables can be utilized;
sort HHID PN;
merge HHID PN using `saved'demog;
drop _merge;

global wave=1;
global minintyr=1992;
global maxintyr=1993;
global intm "AIWMONTH";
global intyr "AIWYEAR";
global deathm "EXDEATHMO";
global deathyr "EXDEATHYR";
global birthyr "BIRTHYR";
global birthm "BIRTHMO";
global school "SCHLYRS";
global hispanic "HISPANIC";
global male "GENDER";
global black "RACE";

_dowave_;

gen married1=0;
replace married1=1 if V225==1|V225==7|V225==8;
replace married1=. if V225==.|V225==9; * coding checked right;

drop V*;
*408C:clear variables for the subjects that were erroneously recorded as being in wave1 and aheadw1, who are really just in aheadw1;
*408C:these are the subjects that have non-zero values for OVHHID in trk2006, see OVHHID documentation for further explanation;
*408C:note that there are two subjects who have non-zero OVHHID values because they remarried, we are ignoring this and leaving them in the sample, but another program will delete their presence in the post-remarriage waves;
replace intm1=. if (AIWMONTH!=. & BIWMONTH!=.);
replace intyr1=. if (AIWMONTH!=. & BIWMONTH!=.);
replace birthm1=. if (AIWMONTH!=. & BIWMONTH!=.);
replace birthyr1=. if (AIWMONTH!=. & BIWMONTH!=.);
replace age1=. if (AIWMONTH!=. & BIWMONTH!=.);
replace realage1=. if (AIWMONTH!=. & BIWMONTH!=.);
replace school1=. if (AIWMONTH!=. & BIWMONTH!=.);
replace black1=. if (AIWMONTH!=. & BIWMONTH!=.);
replace male1=. if (AIWMONTH!=. & BIWMONTH!=.);
replace married1=. if (AIWMONTH!=. & BIWMONTH!=.);
*408C:for OVHHID, 0 is the equivalent of . (there shouldn't be any . values), the . values were created because of the excess number of observations in HRS wave1 (excess because they were counted in both HRS wave1 and aheadw1);
replace OVHHID=0 if OVHHID==.;

sort HHID PN;
save `saved'demog, replace;
drop _all;
****************** HRS WAVE II ******************;
use `wave2d'w2a; 
_getid_;
*408C: moved merge to the beginning of this section to be able to utilize the trk2006 variables;
sort HHID PN;
merge HHID PN using `saved'demog;
drop _merge;

global wave=2;
global minintyr=1994;
global maxintyr=1995;
global deathm "EXDEATHMO";
global deathyr "EXDEATHYR";
global birthyr "BIRTHYR";
global birthm "BIRTHMO";
global intm "CIWMONTH";
global intyr "CIWYEAR";
global school "SCHLYRS";
global black "RACE";
global hispanic "HISPANIC";
replace GENDER=. if GENDER==0; * sex unknown;
global male "GENDER";

_dowave_;


gen married2=0;
replace married2=1 if W200==1|W200==7|W200==8;
replace married2=. if W200==.|W200==9;  * coding checked right;

drop W* ;

sort HHID PN;
save `saved'demog, replace;
drop _all;

****************** HRS WAVE III ******************;
use `wave3d'H96a_r;
_getid_;
sort HHID PN;
save `saved'tmp, replace;
drop _all;

use `wave3d'H96cs_r;
_getid_;
*408C: moved merge command to beginning of section to make trk2006 variables available;
sort HHID PN;
merge HHID PN using `saved'demog;
drop _merge;

sort HHID PN;
merge HHID PN using `saved'tmp;
drop _merge;

global wave=3;
global minintyr=1996;
global maxintyr=1997;
global deathm "EXDEATHMO";
global deathyr "EXDEATHYR";
global birthm "BIRTHMO";
global birthyr "BIRTHYR";
global intm "EIWMONTH";
global intyr "EIWYEAR";
global school "SCHLYRS";
global black "RACE";
global hispanic "HISPANIC";
global male "GENDER";

_dowave_;


gen married3=0;
replace married3=1 if E256A==1|E256A==2|E256A==7;
replace married3=. if E256A==.;  * coding checked right;

rename EALIVE temp;
rename ESUBHH temp1;
gen temp11=EXDEATHMO;
gen temp22=EXDEATHYR;
drop E*;
gen EXDEATHMO=temp11;
gen EXDEATHYR=temp22;
drop temp11;
drop temp22;
rename temp EALIVE;
rename temp1 ESUBHH;

summ;
sort HHID PN;
save `saved'demog, replace;
****************** HRS WAVE IV ******************;
use `wave4d'h98cs_r; 
_getid_;
sort HHID PN;
save `saved'tmp, replace;
drop _all;

use `wave4d'h98a_r; 
_getid_;
sort HHID PN;
merge HHID PN using `saved'tmp;
drop _merge;
sort HHID PN;
save `saved'tmp, replace;

*408C: move merge to early in this section to make trk2006 variables available;
sort HHID PN;
merge HHID PN using `saved'demog;
drop _merge;

global wave=4;
global minintyr=1998;
global maxintyr=1999;
global birthm "BIRTHMO";
global birthyr "BIRTHYR";
global deathyr "EXDEATHYR";
global deathm "EXDEATHMO";

*408C:the following comment was written regarding the previous versions of the variables; 
*408C:however, it's still probably relevant;
* note: It doesn't help much to de-code the "INAP" cases for F488 and F490. There
are too many cases and it will just give you missing values. The following is the
"INAP" cases for F488: 
[Q456:CS CONTINUE] IS (5), [Q603:HH1 R TIME STAMP] IS (GT 0), [Q463:PRELOAD
REINTERVIEW HH] IS (1) AND [Q475:WHICH IW 1ST R-2ND R] IS (1) AND [Q459:WHICH HH] IS
(1)AND {When Q19 is (Q453), [Q29:HH1 R YR BIRTH] IS (GT 0000)}, [Q463:PRELOAD
REINTERVIEW HH] IS (1) AND [Q475:WHICH IW 1ST R-2ND R] IS (1) AND [Q459:WHICH HH] IS
(2) AND {When Q72 is(Q453), [Q82:HH2 BIRTH YR] IS (GT 0000)}, [Q475:WHICH IW 1ST
R-2ND R] IS (NE 1); 

global intm "FIWMONTH"; 
global intyr "FIWYEAR"; 
global black "RACE";
global hispanic "HISPANIC";
global male "GENDER"; 
global school "SCHLYRS"; 

_dowave_;


gen married4=0;
replace married4=1 if F1071==1; * Per HRS email 3/14/02;
replace married4=. if F1071==.|F1071==0|F1071==7; * 7 = other (specify); * coding
checked right;

rename FALIVE temp;
rename FSUBHH temp1;
drop F*;
rename temp FALIVE;
rename temp1 FSUBHH;

summ;

sort HHID PN;
save `saved'demog, replace;

****************** HRS WAVE V ******************;
*408C:replaced h00_cs, which no longer exists, with H00CS_R, which is the assumed equivalent;
*use `wave5d'h00_cs;
use `wave5d'H00CS_R;
_getid_;
sort HHID PN;
save `saved'tmp, replace;
drop _all;

*408C:replaced h00_a, which no longer exists,with H00A_R, which is the assumed equivalent;
*use `wave5d'h00_a;
use `wave5d'H00A_R;
_getid_;
sort HHID PN;
merge HHID PN using `saved'tmp;
drop _merge;
sort HHID PN;
save `saved'tmp, replace;

*408C: merge with demog to get access to trk2006 variables;
sort HHID PN;
merge HHID PN using `saved'demog;
drop _merge;


global wave=5;
global minintyr=2000;
global maxintyr=2001;
global deathm "EXDEATHMO";
global deathyr "EXDEATHYR";
global birthm "BIRTHMO"; 
global birthyr "BIRTHYR";
global intm "GIWMONTH"; 
global intyr "GIWYEAR"; 
global black "RACE";
global hispanic "HISPANIC";
global male "GENDER";
global school "SCHLYRS";

_dowave_;



gen married5=0;

replace married5=1 if G1158==1;
replace married5=. if G1158==.|G1158==0|G1158>6; * 7 = other (specify); * coding
checked right;

rename GALIVE temp;
rename GSUBHH temp1;
rename GENDER temp2;
drop G*;
rename temp GALIVE;
rename temp1 GSUBHH;
rename temp2 GENDER;

summ;

sort HHID PN;
save `saved'demog, replace;

*408C:************ADDED WAVE6 HERE************;

*408C:The wave6 section was based upon the wave5 section, I just went through and coded things to get the equivalent information out of wave6;

use `wave6d'H02A_R;
_getid_;
sort HHID PN;
save `saved'tmp, replace;
drop _all;

use `wave6d'H02B_R;
_getid_;
sort HHID PN;
merge HHID PN using `saved'tmp;
drop _merge;
sort HHID PN;
save `saved'tmp, replace;

*408C: moved merge to beginning of this section to make trk2006 variables available;
sort HHID PN;
merge HHID PN using `saved'demog;
drop _merge;


global wave=6;
global minintyr=2002;
global maxintyr=2003;
global deathm "EXDEATHMO";
global deathyr "EXDEATHYR";
global birthm "BIRTHMO";
global birthyr "BIRTHYR";
global intm "HIWMONTH"; 
global intyr "HIWYEAR"; 
global black "RACE";
global hispanic "HISPANIC";
global male "GENDER";
global school "SCHLYRS";

_dowave_;


gen married6=0;
replace married6=1 if HMARITAL==1; 
replace married6=. if HMARITAL==.|HMARITAL>6; 

rename HALIVE temp;
rename HHID temp1;
rename HHIDPN temp2;
rename HISPANIC temp3;
rename HSUBHH temp4;
drop H*;
rename temp HALIVE;
rename temp1 HHID;
rename temp2 HHIDPN;
rename temp3 HISPANIC;
rename temp4 HSUBHH;

summ;

sort HHID PN;
save `saved'demog, replace;

******************************** WAVE 7-10 *****************************************;

global wave=7;
global minintyr=2004;
global maxintyr=2005;
global deathm "EXDEATHMO";
global deathyr "EXDEATHYR";
global birthm "BIRTHMO";
global birthyr "BIRTHYR";
global intm "JIWMONTH"; 
global intyr "JIWYEAR"; 
global black "RACE";
global hispanic "HISPANIC";
global male "GENDER";
global school "SCHLYRS";

_dowave_;


gen married7=0;
replace married7=1 if JMARST==1; 
replace married7=. if JMARST==.|JMARST>6; 

global wave=8;
global minintyr=2006;
global maxintyr=2007;
global deathm "EXDEATHMO";
global deathyr "EXDEATHYR";
global birthm "BIRTHMO";
global birthyr "BIRTHYR";
global intm "KIWMONTH"; 
global intyr "KIWYEAR"; 
global black "RACE";
global hispanic "HISPANIC";
global male "GENDER";
global school "SCHLYRS";

_dowave_;


gen married8=0;
replace married8=1 if KMARST==1; 
replace married8=. if KMARST==.|KMARST>6; 

global wave=9;
global minintyr=2008;
global maxintyr=2009;
global deathm "EXDEATHMO";
global deathyr "EXDEATHYR";
global birthm "BIRTHMO";
global birthyr "BIRTHYR";
global intm "LIWMONTH"; 
global intyr "LIWYEAR"; 
global black "RACE";
global hispanic "HISPANIC";
global male "GENDER";
global school "SCHLYRS";

_dowave_;


gen married9=0;
replace married9=1 if LMARST==1; 
replace married9=. if LMARST==.|LMARST>6; 

global wave=10;
global minintyr=2010;
global maxintyr=2011;
global deathm "EXDEATHMO";
global deathyr "EXDEATHYR";
global birthm "BIRTHMO";
global birthyr "BIRTHYR";
global intm "MIWMONTH"; 
global intyr "MIWYEAR"; 
global black "RACE";
global hispanic "HISPANIC";
global male "GENDER";
global school "SCHLYRS";

_dowave_;


gen married10=0;
replace married10=1 if MMARST==1; 
replace married10=. if MMARST==.|MMARST>6; 

******************************** WAVE 11-13 *****************************************;

global wave=11;
global minintyr=2012;
global maxintyr=2013;
global deathm "EXDEATHMO";
global deathyr "EXDEATHYR";
global birthm "BIRTHMO";
global birthyr "BIRTHYR";
global intm "NIWMONTH"; 
global intyr "NIWYEAR"; 
global black "RACE";
global hispanic "HISPANIC";
global male "GENDER";
global school "SCHLYRS";
_dowave_;
gen married11=0;
replace married11=1 if NMARST==1; 
replace married11=. if NMARST==.|NMARST>6; 

global wave=12;
global minintyr=2014;
global maxintyr=2015;
global deathm "EXDEATHMO";
global deathyr "EXDEATHYR";
global birthm "BIRTHMO";
global birthyr "BIRTHYR";
global intm "OIWMONTH"; 
global intyr "OIWYEAR"; 
global black "RACE";
global hispanic "HISPANIC";
global male "GENDER";
global school "SCHLYRS";
_dowave_;
gen married12=0;
replace married12=1 if OMARST==1; 
replace married12=. if OMARST==.|OMARST>6; 

global wave=13;
global minintyr=2015;
global maxintyr=2016;
global deathm "EXDEATHMO";
global deathyr "EXDEATHYR";
global birthm "BIRTHMO";
global birthyr "BIRTHYR";
global intm "OIWMONTH"; 
global intyr "OIWYEAR"; 
global black "RACE";
global hispanic "HISPANIC";
global male "GENDER";
global school "SCHLYRS";
_dowave_;
gen married13=0;
replace married13=1 if PMARST==1; 
replace married13=. if PMARST==.|PMARST>6; 

******************************** END WAVE 11-13 *****************************************;


********************************  WAVE 14-15 added by Chris*****************************************;
global wave=14;
global minintyr=2017;
global maxintyr=2018;
global deathm "EXDEATHMO";
global deathyr "EXDEATHYR";
global birthm "BIRTHMO";
global birthyr "BIRTHYR";
global intm "OIWMONTH"; 
global intyr "OIWYEAR"; 
global black "RACE";
global hispanic "HISPANIC";
global male "GENDER";
global school "SCHLYRS";
_dowave_;
gen married14=0;
replace married14=1 if QMARST==1; 
replace married14=. if QMARST==.|QMARST>6; 

if $INCLUDE2020 {; 
disp "Doing 2020 processing..." ;
global wave=15;
global minintyr=2019;
global maxintyr=2020;
global deathm "EXDEATHMO";
global deathyr "EXDEATHYR";
global birthm "BIRTHMO";
global birthyr "BIRTHYR";
global intm "OIWMONTH"; 
global intyr "OIWYEAR"; 
global black "RACE";
global hispanic "HISPANIC";
global male "GENDER";
global school "SCHLYRS";
_dowave_;
gen married15=0;
replace married15=1 if RMARST==1; 
replace married15=. if RMARST==.|RMARST>6; 
	};
	

******************************** END WAVE 14-15 *****************************************;

sort HHID PN;
save `saved'demog, replace;
drop _all;


****************** AHEAD WAVE I ******************;
* Will index this as "HRS wave" 8;
* But this actually occurs between HRS waves 1&2 (1993).;
*408C: Changed all letters in variable names to capitals to match new data file;
use `aheadw1'br21; 
_getid_;
*408C: moved merge to the beginning of this section to make trk2006 variables available;
sort HHID PN;
merge HHID PN using `saved'demog;
drop _merge;

global wave="a1";
global minintyr=1992;
global maxintyr=1994;
global deathm "EXDEATHMO";
global deathyr "EXDEATHYR";
global birthm "BIRTHMO"; 
global birthyr "BIRTHYR"; 
global intm "BIWMONTH"; 
global intyr "BIWYEAR"; 
global black "RACE";
global hispanic "HISPANIC";
global male "GENDER";
global school "SCHLYRS"; 

_dowave_;

gen marrieda1=0;
replace marrieda1=1 if V150==1|V150==2;


*drop V*;


summ;

sort HHID PN;
save `saved'demog, replace;
drop _all;

****************** AHEAD WAVE 2 ******************;
* Will index this as "HRS wave" 9;
* But this actually occurs between HRS waves 2&3 (1995).;
*408C: Changed filename from ahd2a, which no longer exists, to A95A_R, the suspected equivalent.;
*use `aheadw2'ahd2a; 
use `aheadw2'A95A_R; 
_getid_;
sort HHID PN;
save `aheadw2'tmp, replace;
drop _all;

*408C:Changed filename from ahd2a, which no longer exists, to A95CS_R, the suspected equivalent.;
*use `aheadw2'ahd2cs; 
use `aheadw2'A95CS_R; 
_getid_;
sort HHID PN;
merge HHID PN using `aheadw2'tmp;
drop _merge;
sort HHID PN;
save `aheadw2'tmp, replace;

*408C: placed this merge to make the variables in trk2006 available;
sort HHID PN;
merge HHID PN using `saved'demog;
drop _merge;

global wave="a2";
global minintyr=1994;
global maxintyr=1996;
global deathm "EXDEATHMO";
global deathyr "EXDEATHYR";
global birthm "BIRTHMO"; 
global birthyr "BIRTHYR"; 
global intm "DIWMONTH"; 
global intyr "DIWYEAR"; 
global black "RACE";
global hispanic "HISPANIC";
global male "GENDER";
global school "SCHLYRS"; 

_dowave_;


gen marrieda2=0;
replace marrieda2=1 if D256==1|D256==2|D256==7;
replace marrieda2=. if D256==.|D256==0; * checked right;


drop D*;
summ;
sort HHID PN;
merge HHID PN using `saved'demog;
drop _merge;

sort HHID PN;

summ HHID* PN* HHIDPN intm* intyr* birthm* birthyr* age* real* married* male*
black* school*;
save `saved'demog, replace;
drop _all;

****************** big imputations**********************;
drop _all;
program drop _dowave_ _getid_;
use `saved'demog;

* get rid of observations with inconsistent reports;
program define _check_;
     version 6.0;
     gen bydif=abs(birthyr$oldwave-birthyr$wave);
     replace bydif = 0 if bydif ==.;
     gen bmdif=abs(birthm$oldwave-birthm$wave);
     replace bmdif = 0 if bmdif ==.;
     gen mdif=abs(male$oldwave-male$wave);
     replace mdif = 0 if mdif ==.;
     gen bldif=abs(black$oldwave-black$wave);
     replace bldif = 0 if bldif ==.;
     gen schdif=abs(school$oldwave-school$wave);
     replace schdif = 0 if schdif ==.;
     *408C: Since we are using the tracker data we only need to drop things if the tracker variable isn't available;
     *408C: Checking whether tracker data for that variable exists and is being used;
     drop if (bydif ~= 0) & BIRTHYR ==.; 
     drop if (bmdif ~= 0) & BIRTHMO ==.; 
     drop if (mdif ~= 0) & GENDER ==.; 
     drop if (bldif ~= 0) & RACE ==.;
     *408C: note that it was okay to use "RACE" here because even though it's called "black*" throughout the file, "black*" is based on RACE and all we care about is missing values;
     drop if (schdif ~= 0) & SCHLYRS==.;  
     drop *dif;
     end;

*408C:The use of the _check_ program is probably meaningless now that we're using tracker data but I left it in anyway;
* Funny things happening with school4 and schoola2;

sort schoola2;
gen schdif = abs(school4-schoola2);
summ school4 schoola2 schdif;
by schoola2: summ school4 schdif;
drop schdif;
replace school4=schoola2 if (school4~=.)&(schoola2~=.); * Per HRS email 3/19/02;
sort HHID PN;

global oldwave=1; global wave=2;
_check_;

global oldwave=2; global wave=3;
_check_;

global oldwave="a1"; global wave="a2"; 
_check_;

global oldwave=3; global wave=4;
_check_;

global oldwave="a2"; global wave=4;
_check_;

global oldwave=4; global wave=5;
_check_;

global oldwave=5; global wave=6;
_check_;

global oldwave=6; global wave=7;
_check_;

global oldwave=7; global wave=8;
_check_;

global oldwave=8; global wave=9;
_check_;

global oldwave=9; global wave=10;
_check_;

global oldwave=10; global wave=11;
_check_;

global oldwave=11; global wave=12;
_check_;

global oldwave=12; global wave=13;
_check_;
* Chris: I've added these though apparently not required;


global oldwave=13; global wave=14;
_check_;
if $INCLUDE2020 {; 
global oldwave=14; global wave=15;
_check_;
};

// ** UPDATE AFTER NEW TRACKER;


* fill in missing observations for school;
* This might need adjusting once we reconcile HRS wave 4 and AHEAD wave 2;
* Chris: extended for 14 and 15;
* was gen school = school13;
if $INCLUDE2020 {; 
gen school = school15;
replace school=school14 if school==.;
};
else {;
gen school = school14;
};
replace school=school13 if school==.;
replace school=school12 if school==.;
replace school=school11 if school==.;
replace school=school10 if school==.;
replace school=school9 if school==.;
replace school=school8 if school==.;
replace school=school7 if school==.;
replace school=school6 if school==.;
replace school=school5 if school==.;
replace school=school4 if school==.;
replace school=school3 if school==.;
replace school=schoola2 if school==.;
replace school=school2 if school==.;
replace school=schoola1 if school==.;
replace school=school1 if school==.;

* Finally, merge AHEAD waves I&2 into the HRS;

*Check to make sure surveys don't overlap--they shouldn't;
gen twoages =0;
replace twoages = 1 if (age1 ~= .) & (agea1 ~=.);
replace twoages = 1 if (age2 ~= .) & (agea2 ~=.);
summ twoages;
drop twoages;

replace intyr1 = intyra1 if intyr1==.;
replace intm1 = intma1 if intm1==.;
replace birthyr1 = birthyra1 if birthyr1==.;
replace birthm1 = birthma1 if birthm1==.;
replace realage1 = realagea1 if realage1==.;
replace age1 = agea1 if age1==.;
replace married1 = marrieda1 if married1==.;
replace male1 = malea1 if male1==.;
replace black1 = blacka1 if black1==.;
replace school1 = schoola1 if school1==.;

replace intyr2 = intyra2 if intyr2==.;
replace intm2 = intma2 if intm2==.;
replace birthyr2 = birthyra2 if birthyr2==.;
replace birthm2 = birthma2 if birthm2==.;
replace realage2 = realagea2 if realage2==.;
replace age2 = agea2 if age2==.;
replace married2 = marrieda2 if married2==.;
replace male2 = malea2 if male2==.;
replace black2 = blacka2 if black2==.;
replace school2 = schoola2 if school2==.;

drop *a1 *a2;



* Fill in missing values for marriage.  Since marital status can change, these imputations are not innocuous;
if $INCLUDE2020 {; 
replace married15=married14 if married15==.;
replace married15=married13 if married15==.;
replace married15=married12 if married15==.;
replace married15=married11 if married15==.;
replace married15=married10 if married15==.;
replace married15=married9 if married15==.;
replace married15=married8 if married15==.;
replace married15=married7 if married15==.;
replace married15=married6 if married15==.;
replace married15=married5 if married15==.;
replace married15=married4 if married15==.;
replace married15=married3 if married15==.;
replace married15=married2 if married15==.;
replace married15=married1 if married15==.;
};
replace married14=married13 if married14==.;
replace married14=married12 if married14==.;
replace married14=married11 if married14==.;
replace married14=married10 if married14==.;
replace married14=married9 if married14==.;
replace married14=married8 if married14==.;
replace married14=married7 if married14==.;
replace married14=married6 if married14==.;
replace married14=married5 if married14==.;
replace married14=married4 if married14==.;
replace married14=married3 if married14==.;
replace married14=married2 if married14==.;
replace married14=married1 if married14==.;

replace married13=married12 if married13==.;
replace married13=married11 if married13==.;
replace married13=married10 if married13==.;
replace married13=married9 if married13==.;
replace married13=married8 if married13==.;
replace married13=married7 if married13==.;
replace married13=married6 if married13==.;
replace married13=married5 if married13==.;
replace married13=married4 if married13==.;
replace married13=married3 if married13==.;
replace married13=married2 if married13==.;
replace married13=married1 if married13==.;

replace married12=married11 if married12==.;
replace married12=married13 if married12==.;
replace married12=married10 if married12==.;
replace married12=married9 if married12==.;
replace married12=married8 if married12==.;
replace married12=married7 if married12==.;
replace married12=married6 if married12==.;
replace married12=married5 if married12==.;
replace married12=married4 if married12==.;
replace married12=married3 if married12==.;
replace married12=married2 if married12==.;
replace married12=married1 if married12==.;

replace married11=married10 if married11==.;
replace married11=married12 if married11==.;
replace married11=married9 if married11==.;
replace married11=married8 if married11==.;
replace married11=married7 if married11==.;
replace married11=married6 if married11==.;
replace married11=married5 if married11==.;
replace married11=married4 if married11==.;
replace married11=married3 if married11==.;
replace married11=married2 if married11==.;
replace married11=married1 if married11==.;

replace married10=married9 if married10==.;
replace married10=married11 if married10==.;
replace married10=married8 if married10==.;
replace married10=married7 if married10==.;
replace married10=married6 if married10==.;
replace married10=married5 if married10==.;
replace married10=married4 if married10==.;
replace married10=married3 if married10==.;
replace married10=married2 if married10==.;
replace married10=married1 if married10==.;

replace married9=married8 if married9==.;
replace married9=married10 if married9==.;
replace married9=married7 if married9==.;
replace married9=married6 if married9==.;
replace married9=married5 if married9==.;
replace married9=married4 if married9==.;
replace married9=married3 if married9==.;
replace married9=married2 if married9==.;
replace married9=married1 if married9==.;

replace married8=married7 if married8==.;
replace married8=married9 if married8==.;
replace married8=married6 if married8==.;
replace married8=married5 if married8==.;
replace married8=married4 if married8==.;
replace married8=married3 if married8==.;
replace married8=married2 if married8==.;
replace married8=married1 if married8==.;

replace married7=married6 if married7==.;
replace married7=married8 if married7==.;
replace married7=married5 if married7==.;
replace married7=married4 if married7==.;
replace married7=married3 if married7==.;
replace married7=married2 if married7==.;
replace married7=married1 if married7==.;

replace married6=married5 if married6==.;
replace married6=married7 if married6==.;
replace married6=married4 if married6==.;
replace married6=married3 if married6==.;
replace married6=married2 if married6==.;
replace married6=married1 if married6==.;

replace married5=married4 if married5==.;
replace married5=married6 if married5==.;
replace married5=married3 if married5==.;
replace married5=married2 if married5==.;
replace married5=married1 if married5==.;

replace married4=married3 if married4==.;
replace married4=married5 if married4==.;
replace married4=married2 if married4==.;
replace married4=married1 if married4==.;

replace married3=married2 if (married3==.);
replace married3=married4 if (married3==.);
replace married3=married1 if (married3==.);

replace married2=married1 if (married2==.);
replace married2=married3 if (married2==.);

replace married1=married2 if (married1==.);


*408C:make sure that married variable is set to missing if the person is deceased or presumed to be deceased;
*408C:if vital status was "unknown" we assumed that they were alive;
replace married1=. if AALIVE==. & AIWMONTH!=.; *408C:checking that the interview month is non-zero confirms that this person is HRS wave1 and therefore AALIVE is meaningful;
replace married1=. if (BALIVE==.|BALIVE==4|BALIVE==9) & BIWMONTH!=.;
replace married1=. if AALIVE==. & BALIVE==.;
replace married2=. if (CALIVE==.|CALIVE==5|CALIVE==6|CALIVE==9) & CIWMONTH!=.;
replace married2=. if (DALIVE==.|DALIVE==5|DALIVE==6|DALIVE==9) & DIWMONTH!=.;
replace married2=. if CALIVE==. & DALIVE==.;
replace married3=. if (EALIVE==.|EALIVE==5|EALIVE==6|EALIVE==9);
replace married4=. if (FALIVE==.|FALIVE==5|FALIVE==6|FALIVE==9);
replace married5=. if (GALIVE==.|GALIVE==5|GALIVE==6|GALIVE==9);
replace married6=. if (HALIVE==.|HALIVE==5|HALIVE==6|HALIVE==9);
replace married7=. if (JALIVE==.|JALIVE==5|JALIVE==6|JALIVE==9);
replace married8=. if (KALIVE==.|KALIVE==5|KALIVE==6|KALIVE==9);
replace married9=. if (LALIVE==.|LALIVE==5|LALIVE==6|LALIVE==9);
replace married10=. if (MALIVE==.|MALIVE==5|MALIVE==6|MALIVE==9);
replace married11=. if (NALIVE==.|NALIVE==5|NALIVE==6|NALIVE==9);
replace married12=. if (OALIVE==.|OALIVE==5|OALIVE==6|OALIVE==9);
replace married13=. if (PALIVE==.|PALIVE==5|PALIVE==6|PALIVE==9);
replace married14=. if (QALIVE==.|QALIVE==5|QALIVE==6|QALIVE==9);
if $INCLUDE2020 {; 
replace married15=. if (RALIVE==.|RALIVE==5|RALIVE==6|RALIVE==9);
};

*408C:for some reason a few observations with "." for OVHHID and OVPN show up, which I know shouldn't be there because (a) these variables use "0" rather than "." and (b) the documentation tells us that there are only 111 observations with "0", and those are already accounted for;
*408C:switching it to "0" is fine because "0" means that the variable isn't relevant for this individual;
replace OVHHID=0 if OVHHID==.;
replace OVPN=0 if OVPN==.;

*  1 person winds up with age1=1992 -- get rid of him;

replace age1=. if age1>200;
replace age2=. if age2>200;
replace age3=. if age3>200;
replace age4=. if age4>200;
replace age5=. if age5>200;
replace age6=. if age6>200;
replace age7=. if age7>200;
replace age8=. if age8>200;
replace age9=. if age9>200;
replace age10=. if age10>200;
replace age11=. if age11>200;
replace age12=. if age12>200;
replace age13=. if age13>200;
replace age14=. if age14>200;
if $INCLUDE2020 {; 
replace age15=. if age15>200;
};

replace realage1=. if realage1>200;
replace realage2=. if realage2>200;
replace realage3=. if realage3>200;
replace realage4=. if realage4>200;
replace realage5=. if realage5>200;
replace realage6=. if realage6>200;
replace realage7=. if realage7>200;
replace realage8=. if realage8>200;
replace realage9=. if realage9>200;
replace realage10=. if realage10>200;
replace realage11=. if realage11>200;
replace realage12=. if realage12>200;
replace realage13=. if realage13>200;
replace realage14=. if realage14>200;
if $INCLUDE2020 {; 
replace realage15=. if realage15>200;
};
sort HHID PN;
summ HHID* PN* intm* intyr* birthm* birthyr* age* real* married* male* black* school* hrscoh deathyr* deathm*;
keep HHID* PN* intm* intyr* birthm* birthyr* age* real* married* male* black* school* hrscoh deathyr* deathm*;


save `saved'demog, replace;
cap log close;
* compare summary stats before and after;
cd "$folder\hrs\merge\compare\" ;
summtab, contvars(_all) mean median range excel excelname(demog.xlsx) replace;
cd "$folder\hrs\merge\" ;
use demog.old.dta , clear;
cd "$folder\hrs\merge\compare\" ;
summtab, contvars(_all) mean median range excel excelname(demog.xlsx) sheetname(Old);

drop _all;
program drop _all;




